#####################################################################
# Cook, Blas, Carroll, and Sinha 09/16/2016
# For "Two Wrongs Make a Right"  
# To Reproduce Figure 1 
#####################################################################

setwd("C:/Users/sjcook/Desktop/Misclass_Illustration/") ###Users should set working directory###
rm(list = ls()) 

#####################################################################
# Load packages & source user-written estimators 
#####################################################################

install.packages("mvtnorm")
install.packages("ggplot2")
library(mvtnorm)
library(ggplot2)

require(devtools)
install_url("http://CRAN.R-project.org/src/contrib/Archive/McSpatial/McSpatial_1.1.1.tar.gz")
library(McSpatial)

source("misclass_phi1.R")
source("misclass_a1_a2.R")
source("probit.lf.R")

#####################################################################
# Set the seed
#####################################################################
set.seed(38212839)
#####################################################################
# Set number of simulated data sets
#####################################################################
trials= 1000
runs = 51  #Used in loop over different correlations of misclassification below  
#####################################################################
# Pre-allocate vectors and matrices for results 
#####################################################################
coeffs.probit <- matrix(data=NA,nrow=trials,ncol=5)
stderr.probit <- matrix(data=NA,nrow=trials,ncol=4)
coeffs.probit.Fa1.Fa2 <- matrix(data=NA,nrow=trials,ncol=8)
stderr.probit.Fa1.Fa2 <- matrix(data=NA,nrow=trials,ncol=8)
Fa1.a2<- matrix(data=NA,nrow=trials,ncol=2)
naive.coeff.probit <- matrix(data=NA,nrow=trials,ncol=2)
naive.stderr.probit <- matrix(data=NA,nrow=trials,ncol=2)
haus.coeff <- matrix(data=NA,nrow=trials,ncol=3)
haus.stderr <- matrix(data=NA,nrow=trials,ncol=3)
haus.coeff.phi0.phi1 <- matrix(data=NA,nrow=trials,ncol=6)
haus.stderr.phi0.phi1 <- matrix(data=NA,nrow=trials,ncol=6)
#Covering and I.C width 
M1.width.cover <- matrix(data=NA,nrow=trials,ncol=4)
M2.width.cover<- matrix(data=NA,nrow=trials,ncol=4)
M3.width.cover<- matrix(data=NA,nrow=trials,ncol=4)
M4.width.cover<- matrix(data=NA,nrow=trials,ncol=4)
M5.width.cover<- matrix(data=NA,nrow=trials,ncol=4)

count.misclass<-vector()
count.misclass.sample<-vector()

convergenceM3<-vector()
convergenceM4<-vector()
convergenceM5<-vector()

mse.probit.b0 <- matrix(data=NA,nrow=runs,ncol=5)
mse.probit.b1 <- matrix(data=NA,nrow=runs,ncol=5)
cover.probit.b0 <- matrix(data=NA,nrow=runs,ncol=5)
cover.probit.b1 <- matrix(data=NA,nrow=runs,ncol=5)

#####################################################################
#Experimental Conditions 
#Note: Experiment #2 with correlated misclassification probabilities 
#####################################################################

#Outer loop (runs) over different correlations 
#Inner loop (trials) runs 1000 simulations per level of correlation 

for(k in 1:runs){  
for(i in 1:trials){
  
  n <- 1000
  x1 <- rnorm(n) 
  x2 <- rnorm(n) 
  x3 <- rnorm(n)
  
  #####################################################################
  # Generate the true binary response y_true
  #####################################################################
  beta1_true <- 1
  beta0_true<- -1
  lm <-  beta0_true + beta1_true*x1 
  pr.probit<-pnorm(lm)  
  y_true <- rbinom(n,1,pr.probit)
  
  #####################################################################
  # Generate the misclassifed reports for source 1 & 2 
  #####################################################################
  
  ################
  # u is a bivariate normal included in source misclassification below 
  # to induce correlation across the sources 
  ################
  
  Sigma <- matrix(c(1, -0.52+(k*2)/100, -0.52+(k*2)/100 , 1),2,2)
  u <- rmvnorm(n,rep(0,2),Sigma)

  ################
  # Generate the misclassifed reports for source 1  (y_1 in text) 
  ################
  
  delta2 <- 1
  beta2 <- 1
  
  lm_a1 <- -0.7  + delta2*x1 + beta2*x2 + u[,1]
  pr_a1.probit<- pnorm(lm_a1) 
  alpha1.probit <- rbinom(n,1,pr_a1.probit)
  y1 <- y_true*(1-alpha1.probit) 

  ################
  # Generate the misclassifed reports for source 2 (y_2 in text)
  ################
  
  delta3 <- 1
  beta3 <- 1
  
  lm_a2 <- -1.4  +  delta3*x1 + beta3*x3 + u[,2]
  pr_a2.probit <- pnorm(lm_a2) 
  alpha2.probit <- rbinom(n,1,pr_a2.probit)
  y2 <- y_true*(1-alpha2.probit)  

  #####################################################################
  # Aggregate these sources into 1 observed outcome (y_sum in text)
  #####################################################################
  
  y_both <- as.numeric(y1+y2>=1)
  
  #####################################################################
  #  METHOD 1: Naive probit 
  #####################################################################
  tryCatch({naive.out.probit <- glm(y_both ~ x1,family=binomial(link="probit"))
  naive.coeff.probit[i,] <- summary(naive.out.probit)$coef[1:2]
  naive.stderr.probit[i,] <- summary(naive.out.probit)$coef[1:2,2]
  
  UL.b0.M1<-naive.coeff.probit[i,1]+1.96* naive.stderr.probit[i,1]
  LL.b0.M1<-naive.coeff.probit[i,1]-1.96* naive.stderr.probit[i,1]
  UL.b1.M1<-naive.coeff.probit[i,2]+1.96* naive.stderr.probit[i,2]
  LL.b1.M1<-naive.coeff.probit[i,2]-1.96* naive.stderr.probit[i,2]
  Cover.b0.M1<-ifelse( (beta0_true>= LL.b0.M1)& (beta0_true<= UL.b0.M1),1,0)
  Cover.b1.M1<-ifelse( (beta1_true>= LL.b1.M1)& (beta1_true<= UL.b1.M1),1,0)
  
  M1.width.cover[i,]=c(2*1.96* naive.stderr.probit[i,1], Cover.b0.M1, 2*1.96* naive.stderr.probit[i,2], Cover.b1.M1)
  }, error = function(cond) NA)  

  #####################################################################
  #  METHOD 2: Hausman, et al misclassification estimator 
  #  (Assuming \pi_1 as constant and \pi_0=0)
  #####################################################################
  
  tryCatch({
    haus.out <- misclass(y_both ~ x1, a0=FALSE) 
    haus.coeff[i,] <- c(haus.out$estimate[1:2], haus.out$a1)
    ta1<-haus.out$estimate[3]/haus.out$stderr[3]
    haus.sd.a1=abs(haus.out$a1/ta1)
    haus.stderr[i,] <- c(haus.out$stderr[1:2],  haus.sd.a1)
    
    ### Width and Coverage
    UL.b0.M2<-haus.coeff[i,1]+1.96*haus.stderr[i,1]
    LL.b0.M2<-haus.coeff[i,1]-1.96*haus.stderr[i,1]
    UL.b1.M2<-haus.coeff[i,2]+1.96*haus.stderr[i,2]
    LL.b1.M2<-haus.coeff[i,2]-1.96*haus.stderr[i,2]
    Cover.b0.M2<-ifelse( (beta0_true>= LL.b0.M2)& (beta0_true<= UL.b0.M2),1,0)
    Cover.b1.M2<-ifelse( (beta1_true>= LL.b1.M2)& (beta1_true<= UL.b1.M2),1,0)
    
    M2.width.cover[i,]=c(2*1.96*haus.stderr[i,1], Cover.b0.M2, 2*1.96*haus.stderr[i,2], Cover.b1.M2)
  }, error = function(cond) NA) 
  
  
  #####################################################################
  #  METHOD  3: Hausman, et al misclassification estimator
  #  (assuming \pi_1 as probit function and \pi_0=0)
  #####################################################################
  
  tryCatch({
    haus.out.phi0.phi1 <- misclass_phi1(y_both ~ x1+x2+x3) 
    
    if(haus.out.phi0.phi1$convergence==0){
      haus.coeff.phi0.phi1[i,] <- c(haus.out.phi0.phi1$estimate)
      haus.stderr.phi0.phi1[i,] <-haus.out.phi0.phi1$stderr
      
      ### Width and Coverage
      UL.b0.M3<- haus.coeff.phi0.phi1[i,1]+1.96*haus.stderr.phi0.phi1[i,1]
      LL.b0.M3<- haus.coeff.phi0.phi1[i,1]-1.96*haus.stderr.phi0.phi1[i,1]
      UL.b1.M3<- haus.coeff.phi0.phi1[i,2]+1.96*haus.stderr.phi0.phi1[i,2]
      LL.b1.M3<- haus.coeff.phi0.phi1[i,2]-1.96*haus.stderr.phi0.phi1[i,2]
      Cover.b0.M3<-ifelse( (beta0_true>= LL.b0.M3)& (beta0_true<= UL.b0.M3),1,0)
      Cover.b1.M3<-ifelse( (beta1_true>= LL.b1.M3)& (beta1_true<= UL.b1.M3),1,0)
      
      M3.width.cover[i,]=c(2*1.96*haus.stderr.phi0.phi1[i,1], Cover.b0.M3, 2*1.96*haus.stderr.phi0.phi1[i,2], Cover.b1.M3)
    }
    
    convergenceM3[i]<- haus.out.phi0.phi1$convergence
  }, error = function(cond) NA) 
  
  
  
  #####################################################################
  #  METHOD 4: Cook, et al. joint likelihood estimator 
  #  (assuming \alpha_1 and \alpha_2 as constants)  
  #####################################################################
  
  tryCatch({  probitmodel <-misclass_a1_a2(y_both~x1, y1,y2)
              nk=ncol(as.matrix(x1))+1
              
              if(  probitmodel$convergence==0){
                
                coeffs.probit[i,] <- c(probitmodel$estimate[1:nk], probitmodel$a1,probitmodel$a2, probitmodel$a1*probitmodel$a2) 
                stderr.probit[i,] <- c(probitmodel$stderr[1:nk], probitmodel$smat[1],probitmodel$smat[2])
                
                ### Width and Coverage
                UL.b0.M4<-coeffs.probit[i,1]+1.96*stderr.probit[i,1]
                LL.b0.M4<-coeffs.probit[i,1]-1.96* stderr.probit[i,1]
                UL.b1.M4<-coeffs.probit[i,2]+1.96* stderr.probit[i,2]
                LL.b1.M4<-coeffs.probit[i,2]-1.96* stderr.probit[i,2]
                Cover.b0.M4<-ifelse( (beta0_true>= LL.b0.M4)& (beta0_true<= UL.b0.M4),1,0)
                Cover.b1.M4<-ifelse( (beta1_true>= LL.b1.M4)& (beta1_true<= UL.b1.M4),1,0)
                
                M4.width.cover[i,]=c(2*1.96*stderr.probit[i,1], Cover.b0.M4, 2*1.96*stderr.probit[i,2], Cover.b1.M4)
              }
              
              convergenceM4[i]<- probitmodel$convergence
  }, error = function(cond) NA)
  
  #####################################################################
  #  METHOD 5: Cook, et al. joint likelihood estimator 
  #  (assuming \alpha_1(X,Z) and \alpha_2(X,Z) are probit) 
  #####################################################################
  
  tryCatch({
    X0 <- cbind(1, x1)
    X1 <- cbind(1, x1, x2)
    X2 <- cbind(1, x1, x3)
    K <- as.numeric(ncol(X0) + ncol(X1) + ncol(X2) )
    startv = rep(0,K)
    
    probitmodel.Fa1.Fa2 <- optim(startv, probit.lf, gr=NULL, method="BFGS",control=list(maxit=500), hessian=TRUE) # the function probit.lf is defined above
    
    if(  probitmodel.Fa1.Fa2$convergence==0){
      coeffs.probit.Fa1.Fa2[i,] <- probitmodel.Fa1.Fa2$par
      covmat.probit.Fa1.Fa2 <- solve(probitmodel.Fa1.Fa2$hessian)
      stderr.probit.Fa1.Fa2[i,] <- sqrt(diag(covmat.probit.Fa1.Fa2))
      
      ### Width and Coverage
      UL.b0.M5<-coeffs.probit.Fa1.Fa2[i,1]+1.96*stderr.probit.Fa1.Fa2[i,1]
      LL.b0.M5<-coeffs.probit.Fa1.Fa2[i,1]-1.96*stderr.probit.Fa1.Fa2[i,1]
      UL.b1.M5<-coeffs.probit.Fa1.Fa2[i,2]+1.96*stderr.probit.Fa1.Fa2[i,2]
      LL.b1.M5<-coeffs.probit.Fa1.Fa2[i,2]-1.96*stderr.probit.Fa1.Fa2[i,2]
      Cover.b0.M5<-ifelse( (beta0_true>= LL.b0.M5)& (beta0_true<= UL.b0.M5),1,0)
      Cover.b1.M5<-ifelse( (beta1_true>= LL.b1.M5)& (beta1_true<= UL.b1.M5),1,0)
      
      M5.width.cover[i,]=c(2*1.96*stderr.probit.Fa1.Fa2[i,1], Cover.b0.M5, 2*1.96*stderr.probit.Fa1.Fa2[i,2], Cover.b1.M5)
      
      ###
      # The estimate of \alpha_1(X,Z) and \alpha_2(X,Z)  can be obtained as
      ###
      
      boa1=probitmodel.Fa1.Fa2$par[3]
      b1a1=probitmodel.Fa1.Fa2$par[4]
      b2a1=probitmodel.Fa1.Fa2$par[5]
      lm_a1 <-  boa1 +  b1a1*mean(x1) +  b2a1*mean(x2) 
      
      boa2=probitmodel.Fa1.Fa2$par[6]
      b1a2=probitmodel.Fa1.Fa2$par[7]
      b2a2=probitmodel.Fa1.Fa2$par[8]
      lm_a2 <-  boa2 +  b1a2*mean(x1) +  b2a2*mean(x3) 
      
      Fa1<-1/(1+exp(- lm_a1))
      Fa2<-1/(1+exp(- lm_a2))
      Fa1.a2[i,]<-c(Fa1,Fa2) # estore estimates of \alpha_1(X,Z) and \alpha_2(X,Z)
    }
    convergenceM5[i]<-  probitmodel.Fa1.Fa2$convergence
  }, error = function(cond) NA)    
  
  
  ######################################################################
  #How many misclassifications (#y_sum different of y_true) we have in each sample:
  
  count.misclass[i]=0
  for (j in 1:n){count.misclass[i]=count.misclass[i]+ifelse(y_both[j]==y_true[j],0,1)}
  
  # Number of sample with misclassifications
  count.misclass.sample[i]=ifelse(count.misclass[i]==0,0,1)
  
} 


#####################################################################
#####################################################################
# Number of  failures to converge 
#####################################################################
naive.fail = sum(is.na(naive.coeff.probit[,1])) 
haus.fail = sum(is.na(haus.coeff[,1]))
haus.fail.phi0.phi1 = trials-(length(convergenceM3)-sum(is.na(convergenceM3)))
our.method.fail = trials-(length(convergenceM5)-sum(is.na(convergenceM5))) 
our.method.fail.Fa1.Fa2 = trials-(length(convergenceM4)-sum(is.na(convergenceM4)))


#####################################################################
# Store results of MSE & CP over different runs 
#####################################################################

mse.probit.b0[k,] <- cbind(mean((beta0_true-naive.coeff.probit[,1])^2,na.rm=TRUE), mean((beta0_true-coeffs.probit[,1])^2,na.rm=TRUE), mean((beta0_true-coeffs.probit.Fa1.Fa2[,1])^2,na.rm=TRUE), mean((beta0_true-haus.coeff[,1])^2,na.rm=TRUE), mean((beta0_true-haus.coeff.phi0.phi1[,1])^2,na.rm=TRUE) ) 
mse.probit.b1[k,] <- cbind(mean((beta1_true-naive.coeff.probit[,2])^2,na.rm=TRUE), mean((beta1_true-coeffs.probit[,2])^2,na.rm=TRUE), mean((beta1_true-coeffs.probit.Fa1.Fa2[,2])^2,na.rm=TRUE), mean((beta1_true-haus.coeff[,2])^2,na.rm=TRUE), mean((beta1_true-haus.coeff.phi0.phi1[,2])^2,na.rm=TRUE) ) 

cover.probit.b0[k,]<-cbind(mean(M1.width.cover[,2],na.rm=TRUE)*100,sum(M2.width.cover[,2],na.rm=TRUE)/(trials-haus.fail.phi0.phi1)*100,sum(M3.width.cover[,2],na.rm=TRUE)/(trials-haus.fail)*100,mean(M4.width.cover[,2],na.rm=TRUE)*100,sum(M5.width.cover[,2],na.rm=TRUE)/(trials-our.method.fail)*100)
cover.probit.b1[k,]<-cbind(mean(M1.width.cover[,4],na.rm=TRUE)*100,sum(M2.width.cover[,4],na.rm=TRUE)/(trials-haus.fail.phi0.phi1)*100,sum(M3.width.cover[,4],na.rm=TRUE)/(trials-haus.fail)*100,mean(M4.width.cover[,4],na.rm=TRUE)*100,sum(M5.width.cover[,4],na.rm=TRUE)/(trials-our.method.fail)*100)

}



#####################################################################
#Structure Coverage Probability Data for Plot 
#####################################################################

x = seq(-0.5, 0.50, 0.02)
cover.probit.b1.plus = cbind(cover.probit.b1,x)
colnames(cover.probit.b1.plus) <- c("probit","H-Constant","H-Covariates","MS-Constant","MS-Covariates","Coverage Probabilities")

id = seq(1, 51, 1)
cover.probit.b1.p = cbind(cover.probit.b1.plus,id)
colnames(cover.probit.b1.p) <- c("M.Naive-Probit","M.H-Covariates","M.H-Constant","M.MS-Covariates","M.MS-Constant","Corr","ID")

cplot_data = reshape(as.data.frame(cover.probit.b1.p), 
        varying = c("M.Naive-Probit","M.H-Constant","M.H-Covariates","M.MS-Constant","M.MS-Covariates"), 
        timevar = "Estimator",
        direction = "long",
        idvar = "ID",  
        sep=".")

cplot_data$Estimator <- factor(cplot_data$Estimator, as.character(cplot_data$Estimator))


#####################################################################
#Plot Figure 1 
#####################################################################

p = ggplot(data=cplot_data, aes(x=Corr, y=M, group=Estimator, color=Estimator, linetype=Estimator)) 
p + geom_smooth() +  
  theme_bw() +
  ylab("Coverage Probabilities(%)")+
  xlab("Corr(u1,u2)") + 
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank()) 
